library('tidyverse')
## Warning: 'timedatectl' indicates the non-existent timezone name 'n/a'
## Warning: Your system is mis-configured: '/etc/localtime' is not a symlink
## Warning: It is strongly recommended to set envionment variable TZ to 'Etc/UCT'
## (or equivalent)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library('ballgown')
##
## Attaching package: 'ballgown'
##
## The following objects are masked from 'package:dplyr':
##
## contains, expr, last
##
## The following object is masked from 'package:tidyr':
##
## contains
##
## The following object is masked from 'package:ggplot2':
##
## expr
##
## The following object is masked from 'package:base':
##
## structure
library('plotly')
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
ids = c("cdc5_1_S9","cdc5_2_S10","CmasM_1_S15","CmasM_2_S16","EhMyb10_1_S11","EhMyb10_2_S12","pEhEx_2_S8", "pEhEx1_S7")
type = c("EhCDC5-like","EhCDC5-like","EhCDC5-like+EhMyb10","EhCDC5-like+EhMyb10","EhMyb10","EhMyb10","Control","Control")
results = "~/ballgown_r2"
path = paste(results,ids,sep="/")
pheno_data = data.frame(ids,type,path)
pheno_data
bg2 = ballgown(dataDir = "~/ballgown_r2/", samplePattern = "_S", pData=pheno_data)
## Sun Aug 27 06:32:48 2023
## Sun Aug 27 06:32:48 2023: Reading linking tables
## Sun Aug 27 06:32:48 2023: Reading intron data files
## Sun Aug 27 06:32:48 2023: Merging intron data
## Sun Aug 27 06:32:48 2023: Reading exon data files
## Sun Aug 27 06:32:48 2023: Merging exon data
## Sun Aug 27 06:32:48 2023: Reading transcript data files
## Sun Aug 27 06:32:49 2023: Merging transcript data
## Wrapping up the results
## Sun Aug 27 06:32:49 2023
bg_table2 = texpr(bg2, meas="all")
bg_table2 %>% head()
pData(bg2)
bg_table2 %>%
select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
mutate_at(vars(starts_with("FPKM")), ~ log2(.x+1)) %>%
pivot_longer(cols=starts_with("FPKM"), values_to = "FPKM", names_to = "Sample") %>%
mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
ggplot(aes(x=Sample, y=FPKM, fill=Sample)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
stat_results = stattest(bg2, feature='transcript', meas='FPKM', covariate='type')
stat_results %>% head()
stat_results %>% filter(qval<=0.06)
bg_table2 %>%
select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
mutate_at(vars(starts_with("FPKM")), ~ log2(.x+1)) %>%
pivot_longer(cols=starts_with("FPKM"), values_to = "FPKM", names_to = "Sample") %>%
mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
mutate(Type = str_replace(Sample, "_*(1|2)_S\\d+", "")) %>%
mutate(Type = str_replace(Type, "CmasM", "EhCDC5-like+EhMyb10")) %>%
mutate(Type = str_replace(Type, "cdc5", "EhCDC5-like")) %>%
mutate(Type = str_replace(Type, "pEhEx", "Control")) %>%
group_by(t_name, Type) %>%
summarise(FPKM=mean(FPKM))-> tmp_data
## `summarise()` has grouped output by 't_name'. You can override using the
## `.groups` argument.
tmp_data$Type = factor(tmp_data$Type, levels=c("Control","EhMyb10","EhCDC5-like","EhCDC5-like+EhMyb10"))
tmp_data %>%
ggplot(aes(x=Type, y=FPKM, fill=Type)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
bg_table2 %>%
select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
mutate(mean_FPKM_pEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
mutate_at(vars(starts_with("FPKM")), ~ .x/mean_FPKM_pEhEx) %>% head()
bg_table2 %>%
select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>%
pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>%
mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
mutate(Type = str_replace(Sample, "_*(1|2)_S\\d+", "")) %>%
mutate(Type = str_replace(Type, "CmasM", "EhCDC5-like+EhMyb10")) %>%
mutate(Type = str_replace(Type, "cdc5", "EhCDC5-like")) %>%
mutate(Type = str_replace(Type, "pEhEx", "Control")) %>%
group_by(t_name, Type) %>%
summarise(log2FC=mean(log2FC)) -> tmp_data
## `summarise()` has grouped output by 't_name'. You can override using the
## `.groups` argument.
tmp_data$Type = factor(tmp_data$Type, levels=c("Control","EhMyb10","EhCDC5-like","EhCDC5-like+EhMyb10"))
tmp_data %>%
ggplot(aes(x=Type, y=log2FC, fill=Type)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 20, vjust = 0.5, hjust=1)) -> p
ggplotly(p)
## Warning: Removed 8799 rows containing non-finite values (`stat_boxplot()`).
bg_table2 %>%
select(c(t_name, num_exons, length), starts_with("FPKM")) %>%
mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>%
pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>%
mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
filter(!grepl("^U",Sample)) %>%
ggplot(aes(x=exprpEhEx, y=log2FC, color=Sample)) +
geom_point() +
scale_x_log10() +
scale_color_brewer(palette = "Set1")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 8213 rows containing missing values (`geom_point()`).

bg_table2 %>%
select(c(t_name, gene_id,
num_exons, length), starts_with("FPKM")) %>%
mutate(exprpEhEx=(FPKM.pEhEx_2_S8+FPKM.pEhEx1_S7)/2) %>%
mutate_at(vars(starts_with("FPKM")), ~ .x/exprpEhEx) %>%
mutate_at(vars(starts_with("FPKM")), ~ log2(.x)) %>%
pivot_longer(cols=starts_with("FPKM"), values_to = "log2FC", names_to = "Sample") %>%
mutate(Sample=str_replace(Sample, "FPKM.", "")) %>%
filter(abs(log2FC)>5) -> expr_dif
expr_dif %>%
ggplot(aes(x=exprpEhEx, y=log2FC, color=Sample, text=t_name)) +
geom_point() +
scale_x_log10() -> p
ggplotly(p)
## Warning: Transformation introduced infinite values in continuous x-axis
expr_dif %>% filter(num_exons>1 & is.finite(log2FC)) %>% arrange(log2FC)# %>% head()
Transcripts of interest
bg_table2 %>% filter(grepl("EHI_153670|EHI_139370|EHI_200710|EHI_158190|EHI_129760|EHI_181610|EHI_167620|EHI_087610",t_name)) %>% select(t_name,gene_id) -> lookupGenesTable
plotMeans(lookupGenesTable$gene_id[grepl("EHI_200710",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_139370",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_158190",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_153670",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_129760",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_181610",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_167620",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans(lookupGenesTable$gene_id[grepl("EHI_087610",lookupGenesTable$t_name)], bg2, groupvar='type', meas='FPKM', colorby='transcript')

plotMeans("MSTRG.2631", bg2, groupvar='type', meas='FPKM', colorby='transcript')
